####adds weights to state samples

DT <- data.table(DF)
DT$n <- 1:nrow(DT)
DT <- DT[bot==0]
if(state=="Colorado"){
pop.targs <- list(
  # age
  data.frame(age_cat=c('18-29', '30-44', '45-64', '65-105'),
             Freq=nrow(DT)*c(0.213, 0.277, 0.31, 0.20)),
  #race
  data.frame(race_cat=c('white', 'black', 'asian', 'other'), 
             Freq=nrow(DT)*c(0.765, 0.037, 0.037, 0.161)),
  #gender
  data.frame(gender_cat=c('male','female'), Freq=nrow(DT)*c(0.504, 0.496)),
  #ethnicity
  data.frame(ethnic_cat=c('hisp','not hisp'), Freq=nrow(DT)*c(0.168, 0.832)),
  #educ
  data.frame(educ_cat=c('high school', 'some college', 'college', 'grad'), 
            Freq=nrow(DT)*c(0.281, 0.299, 0.265, 0.155)))

}
if(state=="Georgia"){
  pop.targs <- list(
    # age
    data.frame(age_cat=c('18-29', '30-44', '45-64', '65-105'),
               Freq=nrow(DT)*c(0.216, 0.253, 0.331, 0.2)),
    #race
    data.frame(race_cat=c('white', 'black', 'asian', 'other'), 
               Freq=nrow(DT)*c(0.568, 0.314, 0.034, 0.083)),
    #gender
    data.frame(gender_cat=c('male','female'), Freq=nrow(DT)*c(0.478, 0.522)),
    #ethnicity
    data.frame(ethnic_cat=c('hisp','not hisp'), Freq=nrow(DT)*c(0.059, 0.941)),
    #educ
    data.frame(educ_cat=c('high school', 'some college', 'college', 'grad'), 
               Freq=nrow(DT)*c(0.385, 0.299, 0.197, 0.12)))
  
}
if(state=="Texas"){
  pop.targs <- list(
    # age
    data.frame(age_cat=c('18-29', '30-44', '45-64', '65-105'),
               Freq=nrow(DT)*c(0.228, 0.270, 0.313, 0.189)),
    #race
    data.frame(race_cat=c('white', 'black', 'asian', 'other'), 
               Freq=nrow(DT)*c(0.551, 0.129, 0.05, 0.271)),
    #gender
    data.frame(gender_cat=c('male','female'), Freq=nrow(DT)*c(0.492, 0.509)),
    #ethnicity
    data.frame(ethnic_cat=c('hisp','not hisp'), Freq=nrow(DT)*c(0.319, 0.681)),
    #educ
    data.frame(educ_cat=c('high school', 'some college', 'college', 'grad'), 
               Freq=nrow(DT)*c(0.372, 0.314, 0.209, 0.106)))
  
}
if(state=="LA"){
  pop.targs <- list(
    # age
    data.frame(age_cat=c('18-29', '30-44', '45-64', '65-105'),
               Freq=nrow(DT)*c(0.227, 0.267, 0.305, 0.201)),
    #race
    data.frame(race_cat=c('white', 'black', 'asian', 'other'), 
               Freq=nrow(DT)*c(0.356, 0.093, 0.167, 0.383)),
    #gender
    data.frame(gender_cat=c('male','female'), Freq=nrow(DT)*c(0.486, 0.514)),
    #ethnicity
    data.frame(ethnic_cat=c('hisp','not hisp'), Freq=nrow(DT)*c(0.409, 0.591)),
    #educ
    data.frame(educ_cat=c('high school', 'some college', 'college', 'grad'), 
               Freq=nrow(DT)*c(0.345, 0.298, 0.235, 0.123)))
  
}

# Age category.
# Fill in missing values.
set.seed(2022)
DT[is.na(age), age := sample(DT[!is.na(age),age],.N,replace=T)]
#recode to categorical
DT[,age_cat := recode(age,"18:29='18-29';30:44='30-44';45:64='45-64';65:99='65-105'")]

# Educ.
# Fill in missing values.
set.seed(2022)
DT[education=="-3105", education := NA]
DT[is.na(education), education := sample(DT[!is.na(education),education],.N,replace=T)]
# Recode to four categories
DT[,educ_cat := recode(education,"1:3='high school';4:5='some college';6='college';7:8='grad'")]

#Race
# Fill in missing values.
set.seed(2022)
DT$race<-DT$ethnicity
DT[is.na(race), race := sample(DT[!is.na(race),race],.N,replace=T)]
#recode to 4 values
DT[,race_cat := recode(race,"1='white';2='black';4:5='asian';6:7='asian';8:9='asian';10='asian';11:16='other';3='other'")]

#Ethnicity
# Fill in missing values.
set.seed(2022)
DT[is.na(hispanic), hispanic := sample(DT[!is.na(hispanic),hispanic],.N,replace=T)]
#recode to 4 values
DT[,ethnic_cat := recode(hispanic,"1='not hisp';2:6='hisp';6:8='hisp';9='hisp';10='hisp';11='hisp';12:16='hisp'")]

#Gender
# Fill in missing values.
set.seed(2022)
DT[is.na(gender), gender := sample(DT[!is.na(gender),gender],.N,replace=T)]
DT[,gender_cat := recode(gender,"1='male';2='female'")]

any(is.na(DT$age_cat))
any(is.na(DT$race_cat))
any(is.na(DT$ethnic_cat))
any(is.na(DT$gender_cat))
any(is.na(DT$educ_cat))

table(DT$age_cat)
table(DT$race_cat)
table(DT$ethnic_cat)
table(DT$gender_cat)
table(DT$educ_cat)

library(survey)
# Create survey design object for Lucid data.
dt.svy <- svydesign(ids=~1,data=DT,weights=NULL)

# Rake to each population margin.
dt.svy.rk <- rake(dt.svy, sample.margins=list(~age_cat, ~race_cat, ~gender_cat, ~ethnic_cat, ~educ_cat), population.margins=pop.targs)


# Present individuals with largest and smallest weights.
DT$the.wts <- weights(dt.svy.rk)
x<-DT%>%select(c(n, the.wts))
DF$n <- 1:nrow(DF)

DF<-left_join(DF, x, by=c("n"))
rm(DT, dt.svy, pop.targs, x)
